#! /usr/bin/env python
#coding=utf-8

import os
import codecs

VMMAP_SIZE_KEYS = ("vsize", "resident", "dirty", "swap")

class ProcessVmmap(dict):
	def __init__(self, filename):
		self.__load(filename)

	def get_all(self):
		return self.__objects

	def get_all_regions(self):
		return self["regions"]

	PARSING_GLOBAL_INFO = 0
	PARSING_SECTIONS = 1
	PARSING_SUMMARY = 2

	def __parse_global_infos(self, line):
		if line.startswith("REGION TYPE "):
			return True
		sep = line.find(":")
		if sep <= 0:
			return False
		name = line[:sep].strip()
		val = line[sep+1:].strip()
		name = "".join(name.split(" "))
		self["vmmap_info"][name] = val
		return False

	def __get_leading_part(line, len=0):
		if len == 0:
			pos = line.find(" ")
			if pos <= 0:
				return None
		else:
			pos = len
		return (line[:pos].strip(), line[pos+1:].strip())

	def __unnatural_size(s):
		s = s.strip()
		if s.endswith("K"):
			return int(s[:-1])
		elif s.endswith("M"):
			return float(s[:-1]) * 1024
		return 0

	def __parse_sections(self, line):
		if line.startswith("==== Legend"):
			return True
		region = {}

		if line.strip() == "":
			return False
		if line.startswith("====") or line.startswith("REGION TYPE"):
			return False

		# Get region type
		parts = ProcessVmmap.__get_leading_part(line)
		if not parts:
			raise Exception("No region type")
			return False
		region["type"] = parts[0]

		# Get Address start-end
		parts = ProcessVmmap.__get_leading_part(parts[1])
		if not parts:
			raise Exception("No addr start-end")
			return False
		region["addr"] = parts[0]

		start_bracket = parts[1].find('[')
		end_bracket = parts[1].find(']')
		if start_bracket == -1 or end_bracket == -1:
			raise Exception("Could not find memory size block '[]'")

		size_str = parts[1][start_bracket+1:end_bracket]
		size_parts = size_str.strip().split()
		if len(size_parts) < 4:
			raise Exception("Invalid memory size block format")

		region["vsize"] = ProcessVmmap.__unnatural_size(size_parts[0])
		region["resident"] = ProcessVmmap.__unnatural_size(size_parts[1])
		region["dirty"] = ProcessVmmap.__unnatural_size(size_parts[2])
		region["swap"] = ProcessVmmap.__unnatural_size(size_parts[3])

		# The rest of the line after the ']'
		remaining_line = parts[1][end_bracket+1:].strip()

		# Get ugo
		parts = ProcessVmmap.__get_leading_part(remaining_line)
		if not parts:
			# Line ends after the memory size block, which is valid
			region["ugo"] = ""
			region["sharemode"] = ""
			region["purge"] = ""
			region["detail"] = ""
			self["regions"].append(region)
			return False
		region["ugo"] = parts[0]

		# Get sharemode and the rest
		endLine = parts[1]
		parts = ProcessVmmap.__get_leading_part(endLine)
		if not parts:
			# No more parts after sharemode
			region["sharemode"] = endLine.strip()
			region["purge"] = ""
			region["detail"] = ""
		else:
			region["sharemode"] = parts[0]
			remaining_line = parts[1].strip()
			if remaining_line.startswith("PURGE="):
				purge_parts = remaining_line.split(maxsplit=1)
				region["purge"] = purge_parts[0][6:] # Remove "PURGE="
				region["detail"] = purge_parts[1] if len(purge_parts) > 1 else ""
			else:
				region["purge"] = ""
				region["detail"] = remaining_line

		self["regions"].append(region)

		return False

	def __parse_summary(self, line):
		return False

	def find_object(self, name):
		if name in self.__name_dict:
			return self.__name_dict[name]
		return None

	def __merge_objects(self):
		self.__objects = []
		self.__name_dict = {}
		for region in self["regions"]:
			name = region["detail"]
			if name == "":
				name = region["type"]
			obj = self.find_object(name)

			if not obj:
				obj = {}
				obj["name"] = name
				obj["regions"] = []
				obj["marcho"] = False
				if name.startswith("/") or name.startswith("..."):
					obj["marcho"] = True
				self.__objects.append(obj)
				self.__name_dict[name] = obj

			region["object"] = obj
			#del region["detail"]
			obj["regions"].append(region)

		for obj in self.__objects:
			for k in VMMAP_SIZE_KEYS:
				obj[k] = 0
				for region in obj["regions"]:
					obj[k] = obj[k] + region[k]

	def __load(self, filename):
		self["vmmap_info"] = {}
		self["regions"] = []

		if not os.path.exists(filename):
			self.__merge_objects()
			return

		with codecs.open(filename, 'r', 'utf-8') as f:
			parsing_state = ProcessVmmap.PARSING_GLOBAL_INFO
			for line in f.readlines():
				line = line.strip()
				if parsing_state == ProcessVmmap.PARSING_GLOBAL_INFO:
					finished = self.__parse_global_infos(line)
				elif parsing_state == ProcessVmmap.PARSING_SECTIONS:
					finished = self.__parse_sections(line)
				else:
					finished = self.__parse_summary(line)
				if finished:
					parsing_state = parsing_state + 1

		self.__merge_objects()

	def getSum(self, prefix="vmmap_"):
		res = {}
		for k in VMMAP_SIZE_KEYS:
			res[prefix + k] = 0
		for r in self["regions"]:
			for k in VMMAP_SIZE_KEYS:
				res[prefix + k] = res[prefix + k] + r[k]
		return res

	def get_region_types(self):
		res = []
		types_dict = {}
		empty_type = {}
		for k in VMMAP_SIZE_KEYS:
			empty_type[k] = 0
		empty_type["regions"] = 0
		for r in self["regions"]:
			if r["type"] in types_dict:
				t = types_dict[r["type"]]
			else:
				t = empty_type.copy()
				t["name"] = r["type"]
				res.append(t)
				types_dict[r["type"]] = t
			for k in VMMAP_SIZE_KEYS:
				t[k] = t[k] + r[k]
			t["regions"] = t["regions"] + 1
		return res

	def report(self, fileName):
		f = open(fileName, "w")
		f.write("<regions>\n")
		for idx, obj in enumerate(self.get_all_regions()):
			f.write("<region>\n")
			for k in VMMAP_SIZE_KEYS + ("type", "addr", "ugo", "sharemode", "purge", "detail"):
				f.write("<%s>%s</%s>\n" % (k, obj[k], k))
			f.write("</region>\n")
		f.write("</regions>\n")
		f.close()

if __name__ == "__main__":
	import sys
	import traceback

	if len(sys.argv) <= 1:
		print("smaps.py smaps_file1 smaps_file2 ...")
		sys.exit()

	for vmmap in sys.argv[1:]:
		try:
			print("Parsing vmmap file: %s" % vmmap)
			p = ProcessVmmap(vmmap)
			#print(p.get_all_regions())
			out = os.path.splitext(vmmap)[0] + "_vmmap.xml"
			#p.report(out)
			print(p.get_region_types())
			print("  parsed vmmap file: %s" % out)
		except:
			traceback.print_exc()

